wCer2Modeling
26/1/2023
# load the libraries
library(gstat) # geostatistics
library(sf) # spatial data handling
library(sp) # spatial data handling
library(ggplot2) # for ploting
library(ggmap) # maps
library(dplyr) # data manipulation
library(tidyr) # data manipulation
library(tidyverse) # data manipulation
library(tsibble) # data maipulation
library(ggpubr) # visualisation
library(geosphere) # geo maps
library(RColorBrewer) # data visualisation
library(maptools) # geo maps
library(geodist) # geo maps
library(rnaturalearth) # river data
library(viridis) # data visualisation
library(scatterpie) # data visualisation
library(circular) # coordinates
library(Hmisc) # statistics
library(reshape2) # data manipulationFirst import and save the maps
#####################
### Map of Europe
#####################
####### make a white and grey map
map <- get_stamenmap( bbox = c(left = -11, bottom = 35, right = 45, top = 61), zoom = 4, maptype = "toner-background")
## Rivers
# download river shape file to plot Danube on the map
#rivers110 <- ne_download(scale = 110, type = 'rivers_lake_centerlines', category = 'physical')
# transform the shape file into a data frame read by ggplot
#rivers_df <- fortify(rivers110)
# and save
#write.csv(rivers_df, "/Volumes/LaCie/rivers_df.csv", row.names=FALSE)
# import rivers file
rivers_df <- read.csv("/Volumes/LaCie/rivers_df.csv")
# change opacity of basemap
mapatt <- attributes(map)
map_transparent <- matrix(adjustcolor(map, alpha.f = 0.4), nrow = nrow(map))
attributes(map_transparent) <- mapatt
# plot and remove the grid
mygmap <- ggmap(map_transparent) +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'white')) +
geom_path(
data = rivers_df,
aes(long, lat, fill = NULL),
color = "#2A788EFF",
alpha = 0.5, size = 2)
mygmap##########
#Map USA
##########
####### make a white and grey map of the US
usmap <- get_stamenmap( bbox = c(left = -85, bottom = 30, right = -73, top = 50), zoom = 8, maptype = "toner-background")
# change opacity of basemap
usmapatt <- attributes(usmap)
usmap_transparent <- matrix(adjustcolor(usmap, alpha.f = 0.4), nrow = nrow(usmap))
attributes(usmap_transparent) <- usmapatt
# plot and remove the grid
usgmap <- ggmap(usmap_transparent) +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'white'))Plot Figure 1 Native range
##################
## Pie charts Fig. 1
##################
# import the data filerter for duplicated longitude/latitude locations
all_dedup <- read.csv("/Volumes/LaCie/all_dedup.csv")
head(all_dedup) # your data set with coordinates and infection frequncies## state location publication year lat lon host
## 1 Austria Braunau Riegler and stauffer 2002 1999 48.25314 13.03951 L.x.
## 2 Austria Dürnkrut Riegler and stauffer 2002 1998 48.47180 16.84991 P.a.
## 3 Austria Gerasdorf Riegler and stauffer 2002 1998 48.29458 16.46792 P.a.
## 4 Austria Horitschon Riegler and stauffer 2002 1998 47.58483 16.54784 P.a.
## 5 Austria Kalkgrub Riegler and stauffer 2002 1998 48.42032 15.34183 L.x.
## 6 Austria Neuberg Riegler and stauffer 2002 2000 48.60820 16.57265 P.a.
## inf_status total infected inf_perc time dist
## 1 infected 5 5 1 2 399.774
## 2 infected 5 5 1 1 681.908
## 3 infected 8 8 1 1 651.927
## 4 infected 10 10 1 1 655.909
## 5 infected 5 5 1 1 570.771
## 6 infected 10 10 1 3 663.467
# subset for prior and data from this study
all_dedup$uninfected <- all_dedup$total - all_dedup$infected
past <- subset(all_dedup, publication != "Sonja 2022")
present <- subset(all_dedup, publication == "Sonja 2022")
## Europe
past$radiusW=past$total/12
mycol2 = c("#818689", "#E60001") # red and grey
## wCer2 infection status
pie.europe <- mygmap +
geom_point(data = past,
aes(x = lon, y = lat),
colour = "black", size = 0.7) +
geom_scatterpie(aes(lon, lat, r = 0.25), data = present,
cols = c("uninfected", "infected"),
color="black",
linetype = 1,
#size = 3,
alpha = 0.95,
#coord_fixed(),
legend_name = "legend") +
coord_fixed() +
geom_scatterpie_legend(seq(1, ceiling(max(past$total/12)), length = 1),
x=-9, y=59, labeller = function(x) x * 12) +
xlab("Longitude (°E)") + ylab("Latitude (°N)") +
scale_color_manual(values = mycol2,
aesthetics = c("colour", "fill"))## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
pie.europe#ggsave("/Volumes/LaCie/pie.past.png", plot = pie.europe, width = 15, height = 10, dpi = 300, units = "in", device='png')
#ggsave("/Volumes/LaCie/pie.past.svg", plot = pie.europe, width = 15, height = 10, dpi = 300, units = "in", device='svg')Plot Figure 1 introduced range
popUsa <- read.csv("/Volumes/LaCie/popUsa.csv")
# plot the pie chart
popUsa$radius=popUsa$total/40
mycol3 = c("#818689")
pie.usa <- usgmap +
geom_point(data = popUsa,
aes(x = lon, y = lat),
colour = "black", size = 2.5) +
geom_scatterpie(aes(lon, lat, r = radius), data = popUsa,
cols = c("wCer2positive", "wCer2negative"),
color="black",
alpha = 0.95,
#size = 1,
legend_name = "legend") +
coord_fixed() +
geom_scatterpie_legend(seq(1, ceiling(max(popUsa$total/40)), length = 1),
x=-83.5, y=48.5, labeller = function(x) x * 40) +
xlab("Longitude (°E)") + ylab("Latitude (°S)") +
scale_color_manual(values = mycol3,
aesthetics = c("colour", "fill"))## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
pie.usaCheck Spatial and temporal variation in freq
# load the whole data set
all <- read.csv("/Volumes/LaCie/all.csv")
# extract france
franceall <- subset(all, state == "France")
# split france by year, clumping binomial confidence intervals for locations from the same year
confData <- tibble()
splitData <- split(franceall, franceall$year)
for(i in 1:length(splitData)){
confData[i,1] <- splitData[[i]]$state[1] #id
confData[i,2] <- splitData[[i]]$location[1] #pop
confData[i,3] <- splitData[[i]]$publication[1] # lat
confData[i,4] <- splitData[[i]]$year[1] #lon
confData[i,5] <- splitData[[i]]$lat[1] # year
confData[i,6] <- splitData[[i]]$lon[1] # infection
confData[i,7] <- splitData[[i]]$host[1] # number of infected individuals
confData[i,8] <- splitData[[i]]$inf_status[1] # number of uninfected individuals
confData[i,9] <- splitData[[i]]$total[1]
confData[i,10] <- splitData[[i]]$infected[1]
confData[i,11] <- splitData[[i]]$inf_perc[1]
confData[i,12] <- splitData[[i]]$time[1]
confData[i,13] <- splitData[[i]]$dist[1]
#Estimate infection freq. and 95% binomial confidence intervals
confData[i,14] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[2]
confData[i,15] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[3]
}
rm(i)
rm(splitData)
colnames(confData) <- c("state", "location", "publication", "year",
"lat", "lon", "host", "inf_status", "total", "infected", "inf_perc", "time", "dist", "infFreq_lowerCI", "infFreq_upperCI")
# plot
p1 <- ggplot(confData, aes(x=factor(year, level = year), y=inf_perc)) +
geom_errorbar(aes(ymin=infFreq_lowerCI,
ymax=infFreq_upperCI),
width=.2, size=1.5, color=c('grey40', "grey40", "grey40", "grey40")) +
#geom_point(size=4) +
geom_point(size=4, color=c('grey40', "grey40", "grey40", "#ff7700")) +
theme_bw() +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 12),
text = element_text(size = 18),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(0, 1.10), breaks=c(0,0.25,0.5,0.75,1.0)) +
labs(y = "Infection frequency", x = "Sampling year") +
ggtitle("France")
#### Fishers exact test
# France 2021 vs 1999
france1 <- matrix(c(confData$infected[4], confData$total[4]-confData$infected[4],
confData$infected[1], confData$total[1]-confData$infected[1]),
ncol=2)
fisher.test(france1)##
## Fisher's Exact Test for Count Data
##
## data: france1
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# France 2021 vs 2000
france2 <- matrix(c(confData$infected[4], confData$total[4]-confData$infected[4],
confData$infected[2], confData$total[2]-confData$infected[2]),
ncol=2)
fisher.test(france2)##
## Fisher's Exact Test for Count Data
##
## data: france2
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# France 2021 vs 20001
france3 <- matrix(c(confData$infected[4], confData$total[4]-confData$infected[4],
confData$infected[3], confData$total[3]-confData$infected[3]),
ncol=2)
fisher.test(france3)##
## Fisher's Exact Test for Count Data
##
## data: france3
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# extract poland
polandall <- subset(all, state == "Poland")
confData <- tibble()
splitData <- split(polandall, polandall$year)
for(i in 1:length(splitData)){
confData[i,1] <- splitData[[i]]$state[1] #id
confData[i,2] <- splitData[[i]]$location[1] #pop
confData[i,3] <- splitData[[i]]$publication[1] # lat
confData[i,4] <- splitData[[i]]$year[1] #lon
confData[i,5] <- splitData[[i]]$lat[1] # year
confData[i,6] <- splitData[[i]]$lon[1] # infection
confData[i,7] <- splitData[[i]]$host[1] # number of infected individuals
confData[i,8] <- splitData[[i]]$inf_status[1] # number of uninfected individuals
confData[i,9] <- splitData[[i]]$total[1]
confData[i,10] <- splitData[[i]]$infected[1]
confData[i,11] <- splitData[[i]]$inf_perc[1]
confData[i,12] <- splitData[[i]]$time[1]
confData[i,13] <- splitData[[i]]$dist[1]
#Estimate infection freq. and 95% binomial confidence intervals
confData[i,14] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[2]
confData[i,15] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[3]
}
rm(i)
rm(splitData)
colnames(confData) <- c("state", "location", "publication", "year",
"lat", "lon", "host", "inf_status", "total", "infected", "inf_perc", "time", "dist", "infFreq_lowerCI", "infFreq_upperCI")
p2 <- ggplot(confData, aes(x=factor(year, level = year), y=inf_perc)) +
geom_errorbar(aes(ymin=infFreq_lowerCI,
ymax=infFreq_upperCI),
width=.2, size=1.5, color=c('grey40', "grey40", "grey40")) +
geom_point(size=4) +
geom_point(size=4, color=c('grey40', "grey40", "#ff7700")) +
theme_bw() +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 12),
text = element_text(size = 18),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(0, 1.10), breaks=c(0,0.25,0.5,0.75,1.0)) +
geom_signif(y_position = c(1.02), xmin = c("2000"), xmax = c("2019"),
annotation=c("*"), tip_length=0.02, color = "grey40", size =0.6, textsize=9) +
labs(y = "", x = "Sampling year") +
ggtitle("Poland")
# Poland 2019 vs 2000
poland1 <- matrix(c(confData$infected[3], confData$total[3]-confData$infected[3],
confData$infected[1], confData$total[1]-confData$infected[1]),
ncol=2)
fisher.test(poland1)##
## Fisher's Exact Test for Count Data
##
## data: poland1
## p-value = 0.04491
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.9401189 197.6473172
## sample estimates:
## odds ratio
## 10.4627
# Poland 2019 vs 2007
poland2 <- matrix(c(confData$infected[3], confData$total[3]-confData$infected[3],
confData$infected[2], confData$total[2]-confData$infected[2]),
ncol=2)
fisher.test(poland2)##
## Fisher's Exact Test for Count Data
##
## data: poland2
## p-value = 0.1718
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5221963 65.3915049
## sample estimates:
## odds ratio
## 4.616698
# extract austria
austriaall <- subset(all, state == "Austria")
confData <- tibble()
splitData <- split(austriaall, austriaall$year)
for(i in 1:length(splitData)){
confData[i,1] <- splitData[[i]]$state[1] #id
confData[i,2] <- splitData[[i]]$location[1] #pop
confData[i,3] <- splitData[[i]]$publication[1] # lat
confData[i,4] <- splitData[[i]]$year[1] #lon
confData[i,5] <- splitData[[i]]$lat[1] # year
confData[i,6] <- splitData[[i]]$lon[1] # infection
confData[i,7] <- splitData[[i]]$host[1] # number of infected individuals
confData[i,8] <- splitData[[i]]$inf_status[1] # number of uninfected individuals
confData[i,9] <- splitData[[i]]$total[1]
confData[i,10] <- splitData[[i]]$infected[1]
confData[i,11] <- splitData[[i]]$inf_perc[1]
confData[i,12] <- splitData[[i]]$time[1]
confData[i,13] <- splitData[[i]]$dist[1]
#Estimate infection freq. and 95% binomial confidence intervals
confData[i,14] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[2]
confData[i,15] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[3]
}
rm(i)
rm(splitData)
colnames(confData) <- c("state", "location", "publication", "year",
"lat", "lon", "host", "inf_status", "total", "infected", "inf_perc", "time", "dist", "infFreq_lowerCI", "infFreq_upperCI")
p3 <- ggplot(confData, aes(x=factor(year, level = year), y=inf_perc)) +
geom_errorbar(aes(ymin=infFreq_lowerCI,
ymax=infFreq_upperCI),
width=.2, size=1.5, color=c('grey40', "grey40", "grey40", "grey40", "grey40", "grey40")) +
geom_point(size=4, color=c('grey40', "grey40", "grey40", "grey40", "grey40", "#ff7700")) +
theme_bw() +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 12),
text = element_text(size = 18),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(0, 1.10), breaks=c(0,0.25,0.5,0.75,1.0)) +
labs(y = "", x = "Sampling year") +
ggtitle("Austria")
# Austria 2021 vs 1998
austria1 <- matrix(c(confData$infected[6], confData$total[6]-confData$infected[6],
confData$infected[1], confData$total[1]-confData$infected[1]),
ncol=2)
fisher.test(austria1)##
## Fisher's Exact Test for Count Data
##
## data: austria1
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# Austria 2021 vs 1999
austria2 <- matrix(c(confData$infected[6], confData$total[6]-confData$infected[6],
confData$infected[2], confData$total[2]-confData$infected[2]),
ncol=2)
fisher.test(austria2)##
## Fisher's Exact Test for Count Data
##
## data: austria2
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# Austria 2021 vs 2000
austria3 <- matrix(c(confData$infected[6], confData$total[6]-confData$infected[6],
confData$infected[3], confData$total[3]-confData$infected[3]),
ncol=2)
fisher.test(austria3)##
## Fisher's Exact Test for Count Data
##
## data: austria3
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# Austria 2021 vs 2008
austria4 <- matrix(c(confData$infected[6], confData$total[6]-confData$infected[6],
confData$infected[4], confData$total[4]-confData$infected[4]),
ncol=2)
fisher.test(austria4)##
## Fisher's Exact Test for Count Data
##
## data: austria4
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1110773 Inf
## sample estimates:
## odds ratio
## Inf
# Austria 2021 vs 2015
austria5 <- matrix(c(confData$infected[6], confData$total[6]-confData$infected[6],
confData$infected[5], confData$total[5]-confData$infected[5]),
ncol=2)
fisher.test(austria5)##
## Fisher's Exact Test for Count Data
##
## data: austria5
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# extract greece
greeceall <- subset(all, state == "Greece")
confData <- tibble()
splitData <- split(greeceall, greeceall$year)
for(i in 1:length(splitData)){
confData[i,1] <- splitData[[i]]$state[1] #id
confData[i,2] <- splitData[[i]]$location[1] #pop
confData[i,3] <- splitData[[i]]$publication[1] # lat
confData[i,4] <- splitData[[i]]$year[1] #lon
confData[i,5] <- splitData[[i]]$lat[1] # year
confData[i,6] <- splitData[[i]]$lon[1] # infection
confData[i,7] <- splitData[[i]]$host[1] # number of infected individuals
confData[i,8] <- splitData[[i]]$inf_status[1] # number of uninfected individuals
confData[i,9] <- splitData[[i]]$total[1]
confData[i,10] <- splitData[[i]]$infected[1]
confData[i,11] <- splitData[[i]]$inf_perc[1]
confData[i,12] <- splitData[[i]]$time[1]
confData[i,13] <- splitData[[i]]$dist[1]
#Estimate infection freq. and 95% binomial confidence intervals
confData[i,14] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[2]
confData[i,15] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[3]
}
rm(i)
rm(splitData)
colnames(confData) <- c("state", "location", "publication", "year",
"lat", "lon", "host", "inf_status", "total", "infected", "inf_perc", "time", "dist", "infFreq_lowerCI", "infFreq_upperCI")
p4 <- ggplot(confData, aes(x=factor(year, level = year), y=inf_perc)) +
geom_errorbar(aes(ymin=infFreq_lowerCI,
ymax=infFreq_upperCI),
width=.2, size=1.5, color=c('grey40', "grey40", "grey40")) +
geom_point(size=4, color=c('grey40', "grey40", "#ff7700")) +
theme_bw() +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 12),
text = element_text(size = 18),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(0, 1.10), breaks=c(0,0.25,0.5,0.75,1.0)) +
labs(y = "", x = "Sampling year") +
ggtitle("Greece")
# Greece 2019 vs 1999
greece1 <- matrix(c(confData$infected[3], confData$total[3]-confData$infected[3],
confData$infected[1], confData$total[1]-confData$infected[1]),
ncol=2)
fisher.test(greece1)##
## Fisher's Exact Test for Count Data
##
## data: greece1
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# Greece 2019 vs 2000
greece2 <- matrix(c(confData$infected[3], confData$total[3]-confData$infected[3],
confData$infected[2], confData$total[2]-confData$infected[2]),
ncol=2)
fisher.test(greece2)##
## Fisher's Exact Test for Count Data
##
## data: greece2
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0 Inf
## sample estimates:
## odds ratio
## 0
# combine the plots
compare <- ggarrange(p1, p3, p4, p2, nrow = 1)
compareCreate a spatial grid out of the data and coordinates
meuse <- st_as_sf(x = all_dedup,
coords = c("lon", "lat"),
crs = "+proj=longlat") #+datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
bbox <- st_bbox(meuse)
meuse_grid <- meuse %>%
st_bbox() %>% # determines bounding box coordinates from meuse
st_as_sfc() %>% # creates sfc object from bounding box
st_make_grid( # create grid 0.1 x 0.1 pixel size
cellsize = c(0.1, 0.1),
what = "centers") %>%
st_as_sf(crs=st_crs(meuse)) # convert to sf object
### Convert meuse samples to SpatialPointsDataFrame
meuse_sp <- as(meuse, "Spatial")
### Convert meuse grid to SpatialPixelsDataFrame, the raster/grid equivalent in the sp world
meuse_grid_sp <- as(as(meuse_grid, "Spatial"), "SpatialPixels")First check for anisotropy with a directional map
# Create a "gstat" object
TheGStat <- gstat(id="Infection_frequency", formula=inf_perc ~ 1, data=meuse)
TheVariogram=variogram(TheGStat, map=TRUE, cutoff=2000, width=60)
## check whether there is a north-south, etc. trend with a variogram map
plot(TheVariogram, threshold=10)varmap <- plot(TheVariogram, threshold=10)
png(file="/Volumes/Lacie/varmap.png",
width=8, height=8, units="in", res=300)
varmap
dev.off()## quartz_off_screen
## 2
Ok, directional map look good, no sign of directionality. Now fit a variogram model to this data. If there is no directionality, constant variance will alwas result in the presence of a sill (the curve will have a plateau).
#Create directional empirical variograms at 0, 45, 90, 135 degrees from north (y-axis)
meuse.aniso <- gstat::variogram(inf_perc ~ 1, meuse, cressie = T, cutoff = 2000, width = 60, alpha = c(0, 45, 90, 135))
# plot the empirical variograms
plot(meuse.aniso, xlab = "Distance (km)", ylab = "Semivariance")# Now choose a model - I choose the exponential model here
TheModel=vgm(model='Exp' , anis=c(0, 0.5))
# Fit the model to the variogram
FittedModel <- fit.variogram(meuse.aniso, model=TheModel)
## plot results
plot(meuse.aniso, model=FittedModel, as.table=TRUE, xlab = "Distance (km)", ylab = "Semivariance")varaniso <- plot(meuse.aniso, model=FittedModel, as.table=TRUE, xlab = "Distance (km)", ylab = "Semivariance")
#png(file="/Volumes/Lacie/varaniso.png",
#width=10, height=5, units="in", res=300)
#varaniso
#dev.off()Looks good. All four directions have a distinct plateau.
Since we do not have directionality in our data, the exponential variogram model is appropriate to fit here.
meuse.z <- gstat::variogram(inf_perc ~ 1, cressie = T, meuse, cutoff = 2000, width = 60)
#meuse.v <- gstat::variogram(inf_perc ~ 1, cressie = T, meuse, cutoff = 6500, width = 60)
meuse.wav <- vgm(psill=0.1, model = c("Exp"), range = 100, nugget=0.05, covariance=T)
meuse.vfit <- fit.variogram(meuse.z, meuse.wav)
meuse.vfit # give the theorethcal sill, the nugget and the range## model psill range
## 1 Nug 0.03426046 0.0000
## 2 Exp 0.08032188 193.3023
#plot(meuse.z, meuse.vfit)
## create a fucntion to plot a fitted semivariogram with ggplot
plot_variogram <- function(v, vz, m) {
preds = variogramLine(m, maxdist = max(v$dist))
ggplot() +
theme_classic() +
geom_point(data = vz, aes(x = dist, y = gamma, size=np)) +
geom_line(data = preds, aes(x = dist, y = gamma)) +
geom_vline(xintercept = 193.7, linetype="dotted",
color = "black", size=0.8) +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 12),
text = element_text(size = 18),
axis.title = element_text(face = "bold")) +
labs(x = "Distance (km)", y = "Semivariance", color = "sh")
}
variogram <- plot_variogram(v=meuse.z, vz=meuse.z, m=meuse.vfit)
variogram#ggsave("/Volumes/LaCie/variogram_cut2000_w60.png", plot = variogram, width = 5, height = 5, dpi = 300, units = "in", device='png')Now, using spatial interpolation of the infection frequencies at observed locations, predict infection frequency at unobserved locations using ordinary kriging.
###### make kriging function to test different parameters and models #########
mykrige <- function(data, formula, model, inits1, inits2) {
# create and sf object
meuse <- st_as_sf(x = data,
coords = c("lon", "lat"),
crs = "+proj=longlat") #+datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
bbox <- st_bbox(meuse)
# create a grid
meuse_grid <- meuse %>%
st_bbox() %>% # determines bounding box coordinates from meuse
st_as_sfc() %>% # creates sfc object from bounding box
st_make_grid( # create grid 0.1 x 0.1 pixel size
cellsize = c(0.1, 0.1),
what = "centers") %>%
st_as_sf(crs=st_crs(meuse)) # convert to sf object
### Convert meuse samples to SpatialPointsDataFrame
meuse_sp <- as(meuse, "Spatial")
### Convert meuse grid to SpatialPixelsDataFrame, the raster/grid equivalent in the sp world
meuse_grid_sp <- as(as(meuse_grid, "Spatial"), "SpatialPixels")
meuse.v <- gstat::variogram(formula, meuse, cressie = T, cutoff = inits1[1], width = inits1[2])
meuse.wav <- vgm(psill=inits2[1], model = model, range = inits2[2], nugget=inits2[3], covariance = T)
meuse.vfit <- fit.variogram(meuse.v, meuse.wav)
meuse.vfit
#plot(meuse.vfit)
### ordinary kriging
prediction <- krige(formula = formula, meuse_sp, meuse_grid_sp, meuse.vfit)
prediction
}
# call the function with chosent set of parameters based on the theoretical prediction
mk <- mykrige(data=all_dedup, formula=inf_perc ~ 1, model="Exp", inits1=c(1000, 50), inits2 = c(0.1, 100, 0.05))## [using ordinary kriging]
# Coerce raster to dataframe, including coordinates
mk.df <- mk %>%
as("SpatialPixelsDataFrame") %>%
as.data.frame;
# plot kriging predicted contour lines on the map with Danube river
krigplot <- mygmap +
geom_contour(data=mk.df, aes(x=coords.x1, y=coords.x2, z = var1.pred, colour = ..level..),
binwidth = 0.025,
size = 1.5) +
theme(legend.title = element_text(size = 35),
legend.text = element_text(size = 30),
legend.key.size = unit(1, 'in')) +
scale_color_viridis_c("Prediction", option = "plasma")
krigplot#ggsave("/Volumes/LaCie/krigplot.GLMresid_cut2000_w60.png", plot = krigplot, width = 30, height = 30, dpi = 300, units = "in", device='png')
#ggsave("/Volumes/LaCie/krigplot.MarkdownProba.png", plot = krigplot, width = 30, height = 30, dpi = 300, units = "in", device='png')Mathematical modeling of the intergenrations change in infection frequency of the wCer2 in the introduced range
# load the us population
popUsa <- read.csv("/Volumes/LaCie/popUsa.csv")
# calculate confidence intervals
for(i in 1:nrow(popUsa)){
#Estimate infection freq. and 95% binomial confidence intervals
popUsa[i,10] <- binconf(x=as.numeric(popUsa[i,6]), n=as.numeric(popUsa[i,6]) + as.numeric(popUsa[i,7]))[2]
popUsa[i,11] <- binconf(x=as.numeric(popUsa[i,6]), n=as.numeric(popUsa[i,6]) + as.numeric(popUsa[i,7]))[3]
}
colnames(popUsa) <- c("state", "location", "lat", "lon",
"total", "inf", "uninf", "HT2", "HT1", "infFreq_lowerCI", "infFreq_upperCI")
popUsa$time<- 10
popUsa## state location lat lon total inf uninf HT2 HT1
## 1 New York State Orleans County 43.36667 -78.23333 40 0 40 0 40
## infFreq_lowerCI infFreq_upperCI time
## 1 0 0.0876216 10
## write a function to simulate different invasion scenarios
simInvasion <- function(relF, mu, sh, pw, time) {
inf <- pw
for (i in time) {
pwt <- (relF*(1-mu)*inf[i])/(1-sh*inf[i]*(1-inf[i])-mu*sh*(inf[i])^2)
inf <- c(inf, pwt)
}
print(inf)
params <- data_frame(relFit = rep(relF, length(time)),
matfid = rep(mu, length(time)),
ci = rep(sh, length(time)),
t = time)
tot <- cbind(params, inf)
#print(tot)
}
sim1 <- simInvasion(relF=1, mu=0, sh=0.98, pw=0.01, time=c(0:25))## [1] 0.01000000 0.01009797 0.01019787 0.01029975 0.01040369 0.01050972
## [7] 0.01061793 0.01072838 0.01084114 0.01095628 0.01107388 0.01119402
## [13] 0.01131678 0.01144224 0.01157050 0.01170165 0.01183579 0.01197302
## [19] 0.01211346 0.01225720 0.01240438 0.01255511 0.01270952 0.01286776
## [25] 0.01302996 0.01319627
sim2 <- simInvasion(relF=1.02, mu=0, sh=0.98, pw=0.01, time=c(0:25))## [1] 0.01000000 0.01029993 0.01061194 0.01093671 0.01127497 0.01162750
## [7] 0.01199514 0.01237882 0.01277950 0.01319828 0.01363629 0.01409480
## [13] 0.01457519 0.01507894 0.01560768 0.01616320 0.01674745 0.01736259
## [19] 0.01801098 0.01869525 0.01941827 0.02018326 0.02099379 0.02185385
## [25] 0.02276789 0.02374090
sim3 <- simInvasion(relF=1.06, mu=0, sh=0.98, pw=0.01, time=c(0:25))## [1] 0.01000000 0.01070385 0.01146506 0.01228946 0.01318366 0.01415515
## [7] 0.01521250 0.01636552 0.01762550 0.01900553 0.02052081 0.02218913
## [13] 0.02403146 0.02607262 0.02834227 0.03087610 0.03371741 0.03691924
## [19] 0.04054727 0.04468368 0.04943263 0.05492798 0.06134441 0.06891386
## [25] 0.07795032 0.08888833
sim4 <- simInvasion(relF=1.1, mu=0, sh=0.98, pw=0.01, time=c(0:25))## [1] 0.01000000 0.01110777 0.01235150 0.01375105 0.01532990 0.01711609
## [7] 0.01914330 0.02145239 0.02409328 0.02712770 0.03063275 0.03470599
## [13] 0.03947253 0.04509535 0.05179047 0.05984986 0.06967701 0.08184390
## [19] 0.09718526 0.11696071 0.14314527 0.17897259 0.22998884 0.30611454
## [25] 0.42524513 0.61510070
simall <- rbind(sim1, sim2, sim3, sim4)
simall$variable <- c(rep("sim1", nrow(sim1)), rep("sim2", nrow(sim1)), rep("sim3", nrow(sim3)),
rep("sim4", nrow(sim4))
)
#simall[which(simall$inf > 1),]$inf <- 1
head(simall)## relFit matfid ci t inf variable
## 1 1 0 0.98 0 0.01000000 sim1
## 2 1 0 0.98 1 0.01009797 sim1
## 3 1 0 0.98 2 0.01019787 sim1
## 4 1 0 0.98 3 0.01029975 sim1
## 5 1 0 0.98 4 0.01040369 sim1
## 6 1 0 0.98 5 0.01050972 sim1
gsim <- ggplot(data=simall, aes(x=t, y=inf
,col=variable
)) +
geom_line() +
theme_classic() +
scale_color_manual(values=c('red','blue', 'orange', 'forestgreen'), labels = c("relF = 0.9", "relF= 1", "relF = 1.06", "relF = 1.2"),
guide = guide_legend(reverse=TRUE)) +
scale_y_continuous(limits=c(0,1)) +
scale_x_continuous(limits=c(0, 25)) +
labs(y = "Infection frequency", x = "Generations", color = "relF")
#ggtitle(paste("sh =", sh))
gusa <- ggplot(data=popUsa, aes(x=time , y=inf)) +
geom_point(color="grey40", size=2.5) +
geom_errorbar(aes(ymin=infFreq_lowerCI,
ymax=infFreq_upperCI),
width=0.8, size=1, color="grey40") +
scale_y_continuous(limits=c(0,1)) +
scale_x_continuous(limits=c(0, 25)) +
theme_classic()
g_pw_0.01 <- gusa + geom_line(data=simall, aes(x=t, y=inf, col=variable), size=1) +
theme_classic() +
scale_color_manual(values=c('red','blue', 'orange', 'forestgreen'),
# labels = c("F = 1", "F= 1.02", "F = 1.06", "F = 1.1"),
guide = guide_legend(reverse=TRUE)) +
labs(y = "Infection frequency", x = "Generations", color = "relF") +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 13),
text = element_text(size = 18),
#legend.text = element_text(size = 11),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position="none")
#ggtitle(paste("sh =", sh))
g_pw_0.01Mathematical modeling of the wCer2 frequency dynamics and equilibirum in the native range
for(i in 1:nrow(all)){
#Estimate infection freq. and 95% binomial confidence intervals
all[i,14] <- binconf(x=as.numeric(all[i,10]), n=as.numeric(all[i,10] + (all[i, 9] - all[i,10])))[2]
all[i,15] <- binconf(x=as.numeric(all[i,10]), n=as.numeric(all[i,10] + (all[i, 9] - all[i,10])))[3]
}
colnames(all) <- c("state", "location", "publication", "year",
"lat", "lon", "host", "inf_status", "total", "infected", "inf_perc", "time", "dist", "infFreq_lowerCI", "infFreq_upperCI")
all$all <- "all"
# extract bakovic
bak <- subset(all, publication == "Bakovic et al. 2018")
# extract bakovic czeck republic
bakcz<-subset(bak, state != "Hungary")
# extract bakovic hungary
bakhun <- subset(bak, state == "Hungary")
# calculate binomial confidence intervals
confData <- tibble()
splitData <- split(bakhun, bakhun$all)
for(i in 1:length(splitData)){
confData[i,1] <- splitData[[i]]$state[1] #id
confData[i,2] <- splitData[[i]]$location[1] #pop
confData[i,3] <- splitData[[i]]$publication[1] # lat
confData[i,4] <- splitData[[i]]$year[1] #lon
confData[i,5] <- splitData[[i]]$lat[1] # year
confData[i,6] <- splitData[[i]]$lon[1] # infection
confData[i,7] <- splitData[[i]]$host[1] # number of infected individuals
confData[i,8] <- splitData[[i]]$inf_status[1] # number of uninfected individuals
confData[i,9] <- splitData[[i]]$total[1]
confData[i,10] <- splitData[[i]]$infected[1]
confData[i,11] <- splitData[[i]]$inf_perc[1]
confData[i,12] <- splitData[[i]]$time[1]
confData[i,13] <- splitData[[i]]$dist[1]
confData[i,14] <- splitData[[i]]$all[1]
#Estimate infection freq. and 95% binomial confidence intervals
confData[i,15] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[2]
confData[i,16] <- binconf(x=as.numeric(confData[i,10]), n=as.numeric(confData[i,10] + (confData[i, 9] - confData[i,10])))[3]
}
rm(i)
rm(splitData)
colnames(confData) <- c("state", "location", "publication", "year",
"lat", "lon", "host", "inf_status", "total", "infected", "inf_perc", "time", "dist", "all", "infFreq_lowerCI", "infFreq_upperCI")
relF<- seq(0.8, 1.1, by=0.01) # range of fitness effects
# set different values for maternal transmission
#mu=0.0 # perfect maternal transmission
#mu=0.01 # imperfect maternal transmission
#mu=0.03 # imperfect maternal transmission
mu=0.06 # imperfect maternal transmission
# set different levels of CI and calculate unstable and stable equilibria
sh=0.45 # CI
#phatu1 <- (1-relF)/sh # unstable equilibrium when F(1-mu)<1
phatu1 <- (sh + 1 - relF - sqrt((sh + 1 - relF)^2 + 4* sh * ((relF - relF*mu) - 1) * (1 - relF * mu)))/(2*sh*(1-relF*mu)) # unstable equilibrium when F(1-mu)>1
phats1 <- (sh + 1 - relF + sqrt((sh + 1 - relF)^2 + 4* sh * ((relF - relF*mu) - 1) * (1 - relF * mu)))/(2*sh*(1-relF*mu)) # stable equilibrium when F(1-mu)>1
sh=0.75 # CI
#phatu2 <- (1-relF)/sh
phatu2 <- (sh + 1 - relF - sqrt((sh + 1 - relF)^2 + 4* sh * ((relF - relF*mu) - 1) * (1 - relF * mu)))/(2*sh*(1-relF*mu))
phats2 <- (sh + 1 - relF + sqrt((sh + 1 - relF)^2 + 4* sh * ((relF - relF*mu) - 1) * (1 - relF * mu)))/(2*sh*(1-relF*mu))
sh=0.98 # CI
#phatu3 <- (1-relF)/sh
phatu3 <- (sh + 1 - relF - sqrt((sh + 1 - relF)^2 + 4* sh * ((relF - relF*mu) - 1) * (1 - relF * mu)))/(2*sh*(1-relF*mu))
phats3 <- (sh + 1 - relF + sqrt((sh + 1 - relF)^2 + 4* sh * ((relF - relF*mu) - 1) * (1 - relF * mu)))/(2*sh*(1-relF*mu))
yak_low_data <- as.data.frame(cbind(relF, phatu1
, phatu2, phatu3, phats1, phats2, phats3))
yak_low_data <- melt(yak_low_data, id.vars="relF")
yak_low_data$lineffect <- c(rep("unstable", 3*length(relF)), rep("stable", 3*length(relF)))
# plot for different mus
mu006 <- ggplot(data=yak_low_data, aes(x=relF, y=value, col=variable)) +
#geom_ribbon(aes(ymin=confData$infFreq_lowerCI, ymax=confData$infFreq_upperCI), fill="grey") +
geom_hline(yintercept=confData$inf_perc, color = "grey50", size=0.8) +
geom_hline(yintercept=confData$infFreq_lowerCI, linetype="dotted", color = "grey50", size=0.8) +
geom_line(data=subset(yak_low_data, lineffect=="unstable"), linetype=2, size=1) +
geom_line(data=subset(yak_low_data, lineffect=="stable"), linetype=1, size=1) +
theme_classic() +
theme(axis.title.y = element_text(vjust= 1),
axis.title.x = element_text(vjust= 0.1),
axis.text.x = element_text(angle = 0, vjust= 0.5),
axis.text = element_text(size = 13),
text = element_text(size = 18),
#legend.text = element_text(size = 11),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank()
,legend.position="none"
) +
scale_color_manual(values=c('lightblue3','plum4', 'darkorange2', 'lightblue3','plum4', 'darkorange2'), labels = c("sh = 0.45", "sh = 0.75", "sh = 0.98", "sh = 0.45", "sh = 0.75", "sh = 0.98"),
guide = guide_legend(reverse=TRUE)) +
scale_y_continuous(limits=c(0,1)) +
labs(y = "Equlibrium frequency", x = "Fitness", color = "sh")
#ggtitle(paste("mu =", mu))
mu006